Perfect Plots: Bubble Plot [definitions]

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
In [17]:
df2= pd.read_csv('/home/wojciech/Pulpit/1/autos.csv')
df2.head(3)
Out[17]:
Unnamed: 0 symboling normalized_losses make fuel_type aspiration num_doors body_style drive_wheels engine_location ... engine_size fuel_system bore stroke compression_ratio horsepower peak_rpm city_mpg highway_mpg price
0 0 3 NaN alfa-romero gas std two convertible rwd front ... 130 mpfi 3.47 2.68 9.0 111.0 5000.0 21 27 13495.0
1 1 3 NaN alfa-romero gas std two convertible rwd front ... 130 mpfi 3.47 2.68 9.0 111.0 5000.0 21 27 16500.0
2 2 1 NaN alfa-romero gas std two hatchback rwd front ... 152 mpfi 2.68 3.47 9.0 154.0 5000.0 19 26 16500.0

3 rows × 27 columns

I am making a synthetic variable that is supposed to increase the bubble - fuel consumption.

In [3]:
df2['city_mpg2'] = df2['city_mpg']*30
In [4]:
def Bubble_Plot_1(df,X,Y,size, kolor, title):
    
    import matplotlib.pyplot as plt
    
    fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    

    plt.scatter(X, Y, data=df, s=size, c=kolor, cmap='PuBu', edgecolors='grey', linewidths=0.8)
    
    ## cmap='YlGn','PuBu','YlOrBr','RdYlGn'


    plt.title(title, fontsize=16)
    plt.xlabel(X, fontsize=18)
    plt.ylabel(Y, fontsize=18)
    plt.colorbar()

    plt.show()    
In [5]:
df=df2

X = 'horsepower'
Y = 'engine_size'
size = 'city_mpg2'
kolor = 'price'
title = 'Car comparison'  #<-- Tytuł wpisujemy z ręki

Bubble_Plot_1(df,X,Y,size, kolor, title)
In [6]:
def Bubble_Plot_2(df, X, Y, size, kolor, title, title_leg, title_bub):
    fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    

    ax.scatter(X, Y, data=df, s=size, c=kolor, cmap='RdYlGn', edgecolors='grey', linewidths=0.8)
    ax.set_title(title, fontsize=14)
    ax.set_xlabel(X, fontsize=12)
    ax.set_ylabel(Y, fontsize=12)

    ## cmap='YlGn','PuBu','YlOrBr','RdYlGn'

    ## Sztuczka żeby mieć colorbar
    AA = ax.scatter(X, Y, data=df, s=size, c=kolor, cmap='RdYlGn', edgecolors='grey', linewidths=0.1)
    plt.colorbar(AA)
  

    handles, labels = AA.legend_elements(prop="sizes", alpha=0.2)
    legend2 = ax.legend(handles, labels, loc="upper left", title=title_leg)

    ## sztuczka żeby mieć podpisy na kólkach
    for i, txt in enumerate(df[title_bub]):
        ax.annotate(txt, (df[X][i],df[Y] [i]))

    plt.show()  
In [7]:
df=df2

X = 'horsepower'
Y = 'engine_size'
size = 'city_mpg2'
kolor = 'price'
title = 'Car comparison'          ##<- tytuł wpisujemy z ręki
title_leg = 'fuel consumption'    ##<- tytuł wpisujemy z ręki
title_bub = 'make'


Bubble_Plot_2(df,X,Y,size, kolor, title, title_leg,title_bub)

Midwest

In [8]:
df = pd.read_csv('/home/wojciech/Pulpit/2/midwest_filter.csv')
df.head()
Out[8]:
PID county state area poptotal popdensity popwhite popblack popamerindian popasian ... percprof poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert percadultpoverty percelderlypoverty inmetro category dot_size
0 561 ADAMS IL 0.052 66090 1270.961540 63917 1702 98 249 ... 4.355859 63628.0 96.274777 13.151443 18.011717 11.009776 12.443812 0.0 AAR 250.944411
1 562 ALEXANDER IL 0.014 10626 759.000000 7054 3496 19 48 ... 2.870315 10529.0 99.087145 32.244278 45.826514 27.385647 25.228976 0.0 LHR 185.781260
2 563 BOND IL 0.022 14991 681.409091 14477 429 35 16 ... 4.488572 14235.0 94.956974 12.068844 14.036061 10.852090 12.697410 0.0 AAR 175.905385
3 564 BOONE IL 0.017 30806 1812.117650 29344 127 46 150 ... 4.197800 30337.0 98.477569 7.209019 11.179536 5.536013 6.217047 1.0 ALU 319.823487
4 565 BROWN IL 0.018 5836 324.222222 5264 547 14 5 ... 3.367680 4815.0 82.505140 13.520249 13.022889 11.143211 19.200000 0.0 AAR 130.442161

5 rows × 29 columns

In [10]:
df=df

X = 'area'
Y = 'poptotal'
size = 'dot_size'
kolor = 'poptotal'
title = 'Africa cities' 

Bubble_Plot_1(df,X,Y,size, kolor, title)  
In [11]:
df=df

X = 'area'
Y = 'poptotal'
size = 'dot_size'
kolor = 'poptotal'
title = 'Africa'          ##<- tytuł wpisujemy z ręki
title_leg = 'dot_size'    ##<- tytuł wpisujemy z ręki
title_bub = 'county'

Bubble_Plot_2(df,X,Y,size, kolor, title, title_leg,title_bub )
/home/wojciech/anaconda3/lib/python3.7/site-packages/matplotlib/collections.py:995: RuntimeWarning: invalid value encountered in greater_equal
  cond = ((label_values >= func(arr).min()) &
/home/wojciech/anaconda3/lib/python3.7/site-packages/matplotlib/collections.py:996: RuntimeWarning: invalid value encountered in less_equal
  (label_values <= func(arr).max()))

Diabetes

In [12]:
df3= pd.read_csv('/home/wojciech/Pulpit/1/diabetes.csv')
df3.head(2)
Out[12]:
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0

Adds BMI indicator amplifier

In [13]:
df3['BMI_class'] = ((pd.qcut(df3['BMI'],5, labels=False).astype(int))+1)*70
fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black') plt.scatter('Age', 'Glucose', data=df3, s='BMI_class', c='BloodPressure', cmap='YlOrBr', edgecolors='blue', linewidths=0.8) plt.title("Bubble Plot of Diabetes\n color: BloodPressure & size: BMI", fontsize=16) plt.xlabel('Age', fontsize=18) plt.ylabel('Glucose', fontsize=18) plt.colorbar() plt.show()
In [14]:
df=df3

X = 'Age'
Y = 'Glucose'
size = 'BMI_class'
kolor = 'BloodPressure'
title = 'Bubble Plot of Diabetes'  #<-- Tytuł wpisujemy z ręki

Bubble_Plot_1(df,X,Y,size, kolor, title)
In [15]:
df=df

X = 'Age'
Y = 'Glucose'
size = 'BMI_class'
kolor = 'BloodPressure'
title = 'Bubble Plot of Diabetes'  #<-- Tytuł wpisujemy z ręki
title_leg = 'BMI_class'    ##<- tytuł wpisujemy z ręki
title_bub = 'Age'

Bubble_Plot_2(df,X,Y,size, kolor, title, title_leg,title_bub )